In [1]:
from tpot import TPOTRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Here is the options section:

  • fileneme - name of file you want to proceed
  • histroy_depth - number of days to use to predict next value
  • header - number of header lines in data file (set None if file have no header)
  • delimiter - delimiter in file
  • names - names of fields in data file
  • prediction_filed - name of filed you want to use for prediction

In [28]:
filename='SPY Data.csv'
history_depth=3
header=1
delimiter=','
names=['Date','high','low','volume']
prediction_field='high'

In [29]:
bitcoins_daily=pd.read_csv(filename,delimiter=delimiter,header=header,names=names)
closes=bitcoins_daily[prediction_field].values.tolist()
features=np.array([closes[i:i+history_depth] for i in range(len(closes)) if i<(len(closes)-history_depth)])
labels=np.array([closes[i+history_depth] for i in range(len(closes)) if i<(len(closes)-history_depth)])
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)

In [30]:
tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
tpot.export('tpot_daily_bitcoins.py')


Version 0.7.5 of tpot is outdated. Version 0.8.3 was released 4 days ago.
Warning: xgboost.XGBRegressor is not available and will not be used by TPOT.
                                                                                                    
Generation 1 - Current best internal CV score: 1.0939697920746387
                                                                                                    
Generation 2 - Current best internal CV score: 1.0939697920746387
                                                                                                    
Generation 3 - Current best internal CV score: 1.0939697920746387
                                                                                                    
Generation 4 - Current best internal CV score: 1.0939697920746387
                                                                                                    
Generation 5 - Current best internal CV score: 1.0933662005972342
                                                                                                    
Best pipeline: RidgeCV(ZeroCount(SelectFromModel(RidgeCV(input_matrix), SelectFromModel__ExtraTreesRegressor__max_features=0.7, SelectFromModel__ExtraTreesRegressor__n_estimators=DEFAULT, SelectFromModel__threshold=0.2)))
1.49581414988